﻿using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace Commen
{
    public class HtmlHelper
    {
        /// <summary>  
        /// 获取字符中指定标签的值  
        /// </summary>  
        /// <param name="str">字符串</param>  
        /// <param name="title">标签</param>  
        /// <returns>值</returns>  
        public static string GetTitleContent(string str, string title)
        {
            string tmpStr = string.Format("<{0}[^>]*?>(?<Text>[^<]*)</{1}>", title, title); //获取<title>之间内容  
            Match TitleMatch = Regex.Match(str, tmpStr, RegexOptions.Multiline);
            string result = TitleMatch.Groups["Text"].Value;
            return result;
        }

        /// <summary>  
        /// 获取字符中指定标签的值  
        /// </summary>  
        /// <param name="str">字符串</param>  
        /// <param name="title">标签</param>  
        /// <param name="attrib">属性名</param>  
        /// <returns>属性</returns>  
        public static string GetTitleContent(string str, string title, string attrib)
        {
            string tmpStr = string.Format("<{0}[^>]*?{1}=(['\"\"]?)(?<url>[^'\"\"\\s>]+)\\1[^>]*>", title, attrib); //获取<title>之间内容  
            Match TitleMatch = Regex.Match(str, tmpStr, RegexOptions.IgnoreCase);
            string result = TitleMatch.Groups["url"].Value;
            return result;
        }


        public static string ReplaceEmpty(string html)
        {
            html = Regex.Replace(html, "^\\s*", string.Empty, RegexOptions.Compiled | RegexOptions.Multiline);//过滤空格
            html = Regex.Replace(html, "\\r\\n", string.Empty, RegexOptions.Compiled | RegexOptions.Multiline);//过滤换行
            return html;
        }




        ///// <summary>
        ///// 传进来的html 
        ///// </summary>
        ///// <param name="str"></param>
        ///// <param name="p"></param>
        ///// <returns></returns>
        //public static string Mor(string str)
        //{
        //    List<string> mc = new List<string>();



        //    foreach (Match a in mat)
        //    {
        //        var aaaa = a.Groups[0].Value;
        //        var maat = reg2.Match(aaaa) + "";
        //        mc.Add(maat);
        //    }
        //    string strrr = "";
        //    foreach (string a in mc)
        //    {
        //        strrr = strrr + a + "\n";
        //    }
        //    //richTextBox1.Text = strrr;
        //}

        #region 抓取Html 页面内容
        /// <summary>
        /// 抓取Html 页面内容
        /// </summary>
        /// <returns></returns>
        public static string GetHtmlContent(string url,string encod="utf-8")
        {
            if (string.IsNullOrEmpty(url))
            {
                return "";
            }
            try
            {
                //创建一个请求
                HttpWebRequest httprequst = (HttpWebRequest)WebRequest.Create(url);
                //不建立持久性链接
                httprequst.KeepAlive = true;
                //设置请求的方法
                httprequst.Method = "GET";
                //设置标头值
                httprequst.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705";
                httprequst.Accept = "*/*";
                httprequst.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
                httprequst.ServicePoint.Expect100Continue = false;
                httprequst.Timeout = 5000;
                httprequst.AllowAutoRedirect = true;//是否允许302
                ServicePointManager.DefaultConnectionLimit = 30;
                //获取响应
                HttpWebResponse webRes = (HttpWebResponse)httprequst.GetResponse();
                //获取响应的文本流
                string content = string.Empty;
                using (System.IO.Stream stream = webRes.GetResponseStream())
                {
                    using (System.IO.StreamReader reader = new StreamReader(stream, System.Text.Encoding.GetEncoding(encod)))
                    {
                        content = reader.ReadToEnd();
                    }
                }
                //取消请求
                httprequst.Abort();
                //返回数据内容
                return content;
            }
            catch (Exception ex)
            {
                var rt = ex;
                return "";
            }
        }
        #endregion


        #region 抓取Html 页面内容
        ///<summary>
        ///采用https协议访问网络
        ///</summary>
        ///<param name="URL">url地址</param>
        ///<param name="strPostdata">发送的数据</param>
        ///<returns></returns>
        public static string PostHtmlContent(string URL, string strPostdata, string strEncoding = "gbk")
        {
            if (string.IsNullOrEmpty(URL))
            {
                return "";
            }
            try
            {
                Encoding encoding = Encoding.Default;
                HttpWebRequest httprequst = (HttpWebRequest)WebRequest.Create(URL);
                httprequst.Method = "post";
                httprequst.Accept = "text/html, application/xhtml+xml, */*";
                httprequst.ContentType = "application/x-www-form-urlencoded";
                //设置标头值
                httprequst.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705";
                httprequst.Accept = "*/*";
                httprequst.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
                httprequst.ServicePoint.Expect100Continue = false;
                httprequst.Timeout = 5000;
                httprequst.AllowAutoRedirect = true;//是否允许302
                ServicePointManager.DefaultConnectionLimit = 30;

                byte[] buffer = encoding.GetBytes(strPostdata);
                httprequst.ContentLength = buffer.Length;
                httprequst.GetRequestStream().Write(buffer, 0, buffer.Length);
                HttpWebResponse response = (HttpWebResponse)httprequst.GetResponse();
                //获取响应的文本流
                string content = string.Empty;
                using (StreamReader reader = new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding(strEncoding)))
                {
                    content = reader.ReadToEnd();
                }
                //取消请求
                httprequst.Abort();
                //返回数据内容
                return content;
            }
            catch (Exception)
            {

                return "";
            }
        }
        #endregion
    }


}
